# Heavy vs. light flowers
# Callin Switzer
# 12 October 2016
# update 8 Dec 2017
# 
# Compares the bees' frequency when they are buzzing on 
# heavy (metal added) vs. light flowers
# Note: all pores were glued shut
#install packages
ipak <- function(pkg){
     new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
     if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
     sapply(pkg, require, character.only = TRUE)
}

packages <- c("ggplot2", "reshape2", 'lme4', 'sjPlot', "multcomp", "plyr", "effects")
ipak(packages)
## Loading required package: ggplot2
## Loading required package: reshape2
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: sjPlot
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.10
## Current Matrix version is 1.2.11
## Please re-install 'TMB' from source or restore original 'Matrix' package
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
## Loading required package: plyr
## Loading required package: effects
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##  ggplot2 reshape2     lme4   sjPlot multcomp     plyr  effects 
##     TRUE     TRUE     TRUE     TRUE     TRUE     TRUE     TRUE
# set ggplot theme
theme_set(theme_classic())

# define data and figure directories
dataDir <- "/Users/cswitzer/Dropbox/SonicationBehavior/SonBehData"
figDir <- "/Users/cswitzer/Dropbox/SonicationBehavior/SonBehFigs"


# print system info
print(paste("last run ", Sys.time()))
## Warning in as.POSIXlt.POSIXct(x, tz): unknown timezone 'zone/tz/2017c.1.0/
## zoneinfo/America/Los_Angeles'
## [1] "last run  2017-12-08 20:32:49"
print(R.version)
##                _                           
## platform       x86_64-apple-darwin15.6.0   
## arch           x86_64                      
## os             darwin15.6.0                
## system         x86_64, darwin15.6.0        
## status                                     
## major          3                           
## minor          4.2                         
## year           2017                        
## month          09                          
## day            28                          
## svn rev        73368                       
## language       R                           
## version.string R version 3.4.2 (2017-09-28)
## nickname       Short Summer

Read in data and do some visualizations

new_df = read.csv(file.path(dataDir, "02_HeavyLight_cleaned.csv"))
head(new_df)
##   freq     amp                  datetime rewNum
## 1  250 0.17945  2016_09_28__09_59_13_112      1
## 2  300 0.56657  2016_09_28__09_59_14_520      2
## 3  400 0.68371  2016_09_28__09_59_15_030      3
## 4  410 0.80118  2016_09_28__09_59_15_707      4
## 5  370 0.52666  2016_09_28__09_59_16_277      5
## 6  360 0.31541  2016_09_28__09_59_17_096      6
##                                     accFile beeID hive   IT
## 1 2016_09_28__09_59_13_112_220_450_test.txt     7    3 3.98
## 2 2016_09_28__09_59_14_520_220_450_test.txt     7    3 3.98
## 3 2016_09_28__09_59_15_030_220_450_test.txt     7    3 3.98
## 4 2016_09_28__09_59_15_707_220_450_test.txt     7    3 3.98
## 5 2016_09_28__09_59_16_277_220_450_test.txt     7    3 3.98
## 6 2016_09_28__09_59_17_096_220_450_test.txt     7    3 3.98
##                                             accFileAndFolder
## 1 Bee7_28Sept2016_Hive3_S_W/2016_09_28__09_58_58_ampFreq.txt
## 2 Bee7_28Sept2016_Hive3_S_W/2016_09_28__09_58_58_ampFreq.txt
## 3 Bee7_28Sept2016_Hive3_S_W/2016_09_28__09_58_58_ampFreq.txt
## 4 Bee7_28Sept2016_Hive3_S_W/2016_09_28__09_58_58_ampFreq.txt
## 5 Bee7_28Sept2016_Hive3_S_W/2016_09_28__09_58_58_ampFreq.txt
## 6 Bee7_28Sept2016_Hive3_S_W/2016_09_28__09_58_58_ampFreq.txt
##   dateFrozenOrMarked treatment
## 1          28-Sep-16      sham
## 2          28-Sep-16      sham
## 3          28-Sep-16      sham
## 4          28-Sep-16      sham
## 5          28-Sep-16      sham
## 6          28-Sep-16      sham
# show histograms of frequencies
hist(new_df$freq, breaks = seq(215, 455, by = 10))

# Calculate average for each bee and for each treatment
frqMeans <- as.data.frame(tapply(X = new_df$freq, INDEX = list(new_df$beeID, new_df$treatment), mean))
frqMeans$beeID <- row.names(frqMeans)
frqMeans
##            sham weighted  beeID
## 1      344.3333 347.0833      1
## 10     331.2000 382.5000     10
## 11     411.7391 387.6923     11
## 12     344.8276 339.2500     12
## 14     324.0625 281.6000     14
## 15     367.5000 376.3158     15
## 16     357.0588 386.0606     16
## 17     314.0741 341.6000     17
## 18     366.4706 345.0000     18
## 19     375.0000 379.2308     19
## 2      298.5294 322.9167      2
## 20     359.8148 313.3333     20
## 21     331.3158 310.7143     21
## 22     399.4286 388.2857     22
## 23     348.2759 351.2821     23
## 24     287.7778 273.3333     24
## 25     345.0000 352.3684     25
## 26     369.6000 392.7273     26
## 27     366.6667 350.0000     27
## 28     364.4444 374.1935     28
## 29     291.2500 319.3333     29
## 3      399.0323 388.9655      3
## 30     368.0769 369.0323     30
## 31     273.5294 320.4167     31
## 32     325.6000 246.1538     32
## 4      356.6667 371.3636      4
## 5      370.7692 353.7143      5
## 6      284.2857 326.5714      6
## 7      366.9697 396.3415      7
## 8      333.8462 342.0833      8
## 9      344.4000 384.4444      9
## blue   319.1429 333.8462   blue
## gold   374.5098 365.2174   gold
## orange 309.6875 319.1000 orange
## pink   385.3333 367.5000   pink
## white  319.7674 326.9444  white
# conver to long format for ggplot-ing
frqLong <- melt(frqMeans, id.vars = "beeID", measure.vars =  c("sham", "weighted"), 
     variable.name = "trt", value.name = "frq")
nrow(frqLong)
## [1] 72
ggplot(frqLong, aes(x = trt, y = frq)) + 
     geom_boxplot() + 
    geom_line(aes(group = beeID))+
     geom_point()

ggplot(frqLong, aes(x = trt, y = frq)) + 
     geom_boxplot() + 
     labs(y = "Buzz Frequency (Hz)", x = "Flower treatment")

#ggsave(file.path(figDir, "heavyLight_frq.pdf"), width = 5, height = 4)


ggplot(new_df, aes(y = freq, x = treatment, fill = treatment)) + 
     geom_violin() + 
     geom_point(position= position_jitter(width = 0.2, height = 0), alpha = 0.2)+
     facet_wrap(~beeID)

# print some descriptive statistics
nrow(new_df)
## [1] 2360
unique(new_df$hive)
## [1] 3 4
# Calculate average amplitude for each bee and for each treatment
ampMeans <- as.data.frame(tapply(X = new_df$amp, INDEX = list(new_df$beeID, new_df$treatment), mean))

ampMeans$beeID <- row.names(ampMeans)
ampMeans
##             sham   weighted  beeID
## 1      0.8076493 0.49916833      1
## 10     0.4927912 0.31361531     10
## 11     0.3515139 0.23277462     11
## 12     0.4037179 0.36835150     12
## 14     0.2853947 0.22527680     14
## 15     0.4459104 0.39965079     15
## 16     0.5093180 0.37277576     16
## 17     0.3923870 0.34797660     17
## 18     0.3210409 0.23870071     18
## 19     0.4441223 0.25072667     19
## 2      0.3571368 0.34654847      2
## 20     0.4820570 0.35996394     20
## 21     0.4547376 0.35084893     21
## 22     0.3291960 0.25122486     22
## 23     0.6451645 0.46830538     23
## 24     0.4207263 0.23366458     24
## 25     0.3740931 0.20392921     25
## 26     0.2856976 0.22539606     26
## 27     0.4034414 0.31427939     27
## 28     0.4550385 0.40624774     28
## 29     0.4407475 0.22762267     29
## 3      0.4986029 0.57203034      3
## 30     0.5540012 0.29531161     30
## 31     0.5814774 0.31036667     31
## 32     0.3561664 0.05094538     32
## 4      0.3544025 0.38607545      4
## 5      0.5224635 0.33527800      5
## 6      0.2548671 0.37360600      6
## 7      0.4302712 0.48336415      7
## 8      0.7275065 0.28751583      8
## 9      0.3945476 0.29620407      9
## blue   0.3237263 0.19929000   blue
## gold   0.3532037 0.21221913   gold
## orange 0.5373041 0.46789970 orange
## pink   0.3114410 0.40220477   pink
## white  0.2293505 0.15642500  white
ampLong <- melt(ampMeans, id.vars = "beeID", measure.vars =  c("sham", "weighted"), 
                variable.name = "trt", value.name = "amp")

ggplot(ampLong, aes(x = trt, y = amp)) + 
     geom_boxplot() + 
     geom_point() 

ggplot(ampLong, aes(x = trt, y = amp)) + 
     geom_boxplot() + 
     geom_point() +
  geom_line(aes(group = beeID))

ggplot(ampLong, aes(x = trt, y = amp)) + 
     geom_boxplot() + 
     geom_point() + 
     labs(y = "Buzz Amplitude (V)", x = "Flower treatment")

#ggsave(file.path(figDir, 'heavyLightAmp.pdf'), width = 5, heigh = 4)

nrow(ampLong)
## [1] 72

Modeling frequency with lmer

new_df$hive = as.factor(new_df$hive)

m1 = lmer(freq ~ treatment+IT+  hive + (1|beeID), data = new_df, REML = FALSE)
summary(m1)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: freq ~ treatment + IT + hive + (1 | beeID)
##    Data: new_df
## 
##      AIC      BIC   logLik deviance df.resid 
##  25150.1  25184.7 -12569.0  25138.1     2354 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3652 -0.4741  0.1870  0.6837  2.3281 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  beeID    (Intercept)  871     29.51   
##  Residual             2357     48.55   
## Number of obs: 2360, groups:  beeID, 36
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)        355.250     59.273   5.993
## treatmentweighted    2.614      2.064   1.267
## IT                  -1.852     14.875  -0.125
## hive4               -4.128     10.909  -0.378
## 
## Correlation of Fixed Effects:
##             (Intr) trtmnt IT    
## trtmntwghtd -0.002              
## IT          -0.995 -0.017       
## hive4       -0.047  0.000 -0.009
# interaction that may be important, based on domain knowledge
m1_1 = lmer(freq ~ treatment*IT+  hive + (1|beeID), data = new_df, REML = FALSE)
summary(m1_1)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: freq ~ treatment * IT + hive + (1 | beeID)
##    Data: new_df
## 
##      AIC      BIC   logLik deviance df.resid 
##  25147.4  25187.8 -12566.7  25133.4     2353 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3263 -0.4723  0.1862  0.6953  2.3652 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  beeID    (Intercept)  883.5   29.72   
##  Residual             2351.6   48.49   
## Number of obs: 2360, groups:  beeID, 36
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)           385.150     61.254   6.288
## treatmentweighted     -48.137     23.516  -2.047
## IT                     -9.417     15.378  -0.612
## hive4                  -4.044     10.983  -0.368
## treatmentweighted:IT   12.750      5.885   2.167
## 
## Correlation of Fixed Effects:
##             (Intr) trtmnt IT     hive4 
## trtmntwghtd -0.225                     
## IT          -0.995  0.225              
## hive4       -0.045 -0.003 -0.010       
## trtmntwg:IT  0.225 -0.996 -0.227  0.004
BIC(m1, m1_1) # stay with model 1 (lower BIC)
##      df      BIC
## m1    6 25184.69
## m1_1  7 25187.77
m2 = lmer(freq ~ hive + IT+ (1|beeID), data = new_df, REML = FALSE)
BIC(m1, m2) # treatment doesn't make the model better
##    df      BIC
## m1  6 25184.69
## m2  5 25178.53
anova(m1, m2) # p-value for paper
## Data: new_df
## Models:
## m2: freq ~ hive + IT + (1 | beeID)
## m1: freq ~ treatment + IT + hive + (1 | beeID)
##    Df   AIC   BIC logLik deviance  Chisq Chi Df Pr(>Chisq)
## m2  5 25150 25178 -12570    25140                         
## m1  6 25150 25185 -12569    25138 1.6039      1     0.2054
# p-value for hive
m3 = lmer(freq ~ treatment + (1|beeID), data = new_df, REML = FALSE)
anova(m1, m3)
## Data: new_df
## Models:
## m3: freq ~ treatment + (1 | beeID)
## m1: freq ~ treatment + IT + hive + (1 | beeID)
##    Df   AIC   BIC logLik deviance  Chisq Chi Df Pr(>Chisq)
## m3  4 25146 25169 -12569    25138                         
## m1  6 25150 25185 -12569    25138 0.1592      2     0.9235
# diagnostics
m1 <- update(m1, .~., REML =TRUE)
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: freq ~ treatment + IT + hive + (1 | beeID)
##    Data: new_df
## 
## REML criterion at convergence: 25115.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3676 -0.4684  0.1873  0.6825  2.3319 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  beeID    (Intercept)  954.3   30.89   
##  Residual             2357.8   48.56   
## Number of obs: 2360, groups:  beeID, 36
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)        355.173     61.937   5.734
## treatmentweighted    2.614      2.065   1.266
## IT                  -1.833     15.543  -0.118
## hive4               -4.129     11.398  -0.362
## 
## Correlation of Fixed Effects:
##             (Intr) trtmnt IT    
## trtmntwghtd -0.002              
## IT          -0.995 -0.016       
## hive4       -0.047  0.000 -0.009
plot(m1)

qqnorm(ranef(m1)$beeID[[1]])
qqline(ranef(m1)$beeID[[1]])

sjp.lmer(m1, type = "re", sort = TRUE) # plot random effects to find any outliers
## Plotting random effects...

sjp.lmer(m1, type = 'fe', sort = TRUE, p.kr = FALSE) # plot fixed effects
## Computing p-values via Wald-statistics approximation (treating t as Wald z).

Bootstrap CI’s for figure for paper

# set number of bootstrap replicates for models
nbootSims = 1000

# using hive 5, since it's the one with the most data
# calculate an average IT for prediction
ITmean = mean(tapply(new_df$IT, INDEX = new_df$beeID, FUN = function(x) x[1] ))
pframe <- data.frame(treatment = levels(droplevels(new_df$treatment)),IT = ITmean, hive = factor(4, levels = levels(new_df$hive)),  beeID = 99999)
pframe$freq <- 0
pp <- predict(m1, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0


### Calculate CI's (using bootstrap, not accounting for random effects)
bb2 <- bootMer(m1, FUN=function(x) predict(x, pframe, re.form=NA, type = 'response'), nsim = nbootSims)
bb2_se <-apply(bb2$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb2_se[1,]
pframe$bhi<-bb2_se[2,]
pframe$predMean <- pp
pframe <- pframe[, c('treatment', "blo", "bhi", "predMean")]

Make frequency plots for paper

# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
g0 <- ggplot(pframe, aes(x=treatment, y=predMean))+
     geom_point()+ 
     labs(y = "Sonication frequency (Hz)", x = "Flower treatment") + 
     geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') +
     theme_classic() 
g0

ggsave(plot = g0, filename = file.path(figDir, "HeavyLight_PredsAndCI_freq.pdf"), width =4, height = 3)

Modeling amplitude with lmer

First, convert amplitude (originally measured in Volts) to m/s/s, using the accelerometer calibration

# convert amplitude to m/s/s

# conversion factor = 10.17 mv/(m/s/s)
new_df$amp_acc = (new_df$amp * 1000) / 10.17

ma1 = lmer(amp_acc ~ treatment + IT +  hive + (1|beeID), data = new_df, REML = FALSE)
ma1_1 = lmer(amp_acc ~ treatment * IT +  hive + (1|beeID), data = new_df, REML = FALSE)
BIC(ma1, ma1_1) # interaction is ever so slightly better
##       df      BIC
## ma1    6 21221.64
## ma1_1  7 21221.31
ma2 <- update(ma1_1, .~. - hive)
BIC(ma1_1, ma2) # hive not important
##       df      BIC
## ma1_1  7 21221.31
## ma2    6 21214.53
ma3 <- update(ma2, .~., REML = TRUE)
summary(ma2) # final model to report
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: amp_acc ~ treatment + IT + (1 | beeID) + treatment:IT
##    Data: new_df
## 
##      AIC      BIC   logLik deviance df.resid 
##  21179.9  21214.5 -10584.0  21167.9     2354 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6197 -0.6795 -0.1395  0.4918  5.0480 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  beeID    (Intercept)  76.04    8.72   
##  Residual             443.09   21.05   
## Number of obs: 2360, groups:  beeID, 36
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)            19.272     18.858   1.022
## treatmentweighted     -39.453     10.194  -3.870
## IT                      5.913      4.740   1.247
## treatmentweighted:IT    7.254      2.551   2.844
## 
## Correlation of Fixed Effects:
##             (Intr) trtmnt IT    
## trtmntwghtd -0.317              
## IT          -0.996  0.316       
## trtmntwg:IT  0.318 -0.996 -0.320
# diagnostics
# diagnostics
plot(ma3)

qqnorm(ranef(ma3)$beeID[[1]])
qqline(ranef(ma3)$beeID[[1]])

sjp.lmer(ma3) # plot random effects to find any outliers
## Plotting random effects...

sjp.lmer(ma3, type = 'fe', sort = TRUE, p.kr = FALSE) # plot fixed effects
## Computing p-values via Wald-statistics approximation (treating t as Wald z).

Bootstrap CI’s for amplitude for figure for paper

# set number of bootstrap replicates for models
nbootSims2 = 1000

# using hive 5, since it's the one with the most data
pframe <- data.frame(treatment = levels(droplevels(new_df$treatment)), IT = ITmean, hive = factor(4, levels = levels(new_df$hive)),  beeID = 99999)
pframe$amp_acc <- 0
pp <- predict(ma3, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0


### Calculate CI's (using bootstrap, not accounting for random effects)
bb2 <- bootMer(ma3, FUN=function(x) predict(x, pframe, re.form=NA, type = 'response'), nsim = nbootSims2)
bb2_se <-apply(bb2$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb2_se[1,]
pframe$bhi<-bb2_se[2,]
pframe$predMean <- pp
pframe <- pframe[, c('treatment', "blo", "bhi", "predMean")]

Make amplitude plots for paper

# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
# Holding IT = meanIT
print(ITmean)
## [1] 3.965833
ga0 <- ggplot(pframe, aes(x=treatment, y=predMean))+
     geom_point()+ 
     labs(y = expression ("Sonication amplitude "(m~s^{-2})), x = "Flower treatment") + 
     geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') +
     theme_classic() + 
  annotate(geom="text", x=c(1,2), y=c(0, 0) + 50, label=c("a", "b"),
                color="black") 
ga0

ggsave(plot = ga0, filename = file.path(figDir, "HeavyLight_PredsAndCI_amp.pdf"), width =4, height = 3)